<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" data-whc_version="25.0">
    <head><link rel="shortcut icon" href="../../../oxygen-webhelp/template/images/favicon.png"/><link rel="icon" href="../../../oxygen-webhelp/template/images/favicon.png"/><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><meta name="copyright" content="(C) Copyright 2024"/><meta name="generator" content="DITA-OT"/><meta name="description" content="A cluster pipeline is a pipeline that runs in cluster execution mode. Important: This functionality is deprecated and may be removed in a future release. StreamSets recommends using StreamSets ..."/><meta name="prodname" content="Data Collector"/><meta name="version" content="3"/><meta name="release" content="16"/><meta name="modification" content="0"/>        
      <title>Cluster Pipelines (deprecated)</title><!--  Generated with Oxygen version 25.1, build number 2023042410.  --><meta name="wh-path2root" content="../../../"/><meta name="wh-toc-id" content="concept_hmh_kfn_1s-d16893e64701"/><meta name="wh-source-relpath" content="datacollector/UserGuide/Cluster_Mode/ClusterPipelines.dita"/><meta name="wh-out-relpath" content="datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html"/>

    <link rel="stylesheet" type="text/css" href="../../../oxygen-webhelp/app/commons.css?buildId=2023042410"/>
    <link rel="stylesheet" type="text/css" href="../../../oxygen-webhelp/app/topic.css?buildId=2023042410"/>

    <script src="../../../oxygen-webhelp/app/options/properties.js?buildId=20240802104629"></script>
    <script src="../../../oxygen-webhelp/app/localization/strings.js?buildId=2023042410"></script>
    <script src="../../../oxygen-webhelp/app/search/index/keywords.js?buildId=20240802104629"></script>
    <script defer="defer" src="../../../oxygen-webhelp/app/commons.js?buildId=2023042410"></script>
    <script defer="defer" src="../../../oxygen-webhelp/app/topic.js?buildId=2023042410"></script>
<link rel="stylesheet" type="text/css" href="../../../oxygen-webhelp/template/light.css?buildId=2023042410"/><link rel="stylesheet" type="text/css" href="../../../skin.css"/></head>

    <body class="wh_topic_page frmBody">
        
        
        

        
<nav class="navbar navbar-default wh_header" data-whc_version="25.0">
    <div class="container-fluid">
        <div class="wh_header_flex_container navbar-nav navbar-expand-md navbar-dark">
            <div class="wh_logo_and_publication_title_container">
                <div class="wh_logo_and_publication_title">
                    
                    <!--
                            This component will be generated when the next parameters are specified in the transformation scenario:
                            'webhelp.logo.image' and 'webhelp.logo.image.target.url'.
                            See: http://oxygenxml.com/doc/versions/17.1/ug-editor/#topics/dita_webhelp_output.html.
                    -->
                    
                    <div class=" wh_publication_title "><a href="../../../index.html"><span class="booktitle">  <span class="ph mainbooktitle"><span class="ph">Data Collector</span> User Guide</span>  </span></a></div>
                    
                </div>
                
                <!-- The menu button for mobile devices is copied in the output only when the 'webhelp.show.top.menu' parameter is set to 'yes' -->
                
            </div>

            <div class="wh_top_menu_and_indexterms_link collapse navbar-collapse">
                
                
                <div class=" wh_indexterms_link "><a href="../../../indexTerms.html" title="Index" aria-label="Go to index terms page"><span>Index</span></a></div>
                
            </div>
        </div>
    </div>
</nav>

        <div class=" wh_search_input navbar-form wh_topic_page_search search " role="form">


<form id="searchForm" method="get" role="search" action="../../../search.html"><div><input type="search" placeholder="Search " class="wh_search_textfield" id="textToSearch" name="searchQuery" aria-label="Search query" required="required"/><button type="submit" class="wh_search_button" aria-label="Search"><span class="search_input_text">Search</span></button></div></form>

</div>
        
        <div class="container-fluid">
            <div class="row">

                <nav class="wh_tools d-print-none">
                    
<div data-tooltip-position="bottom" class=" wh_breadcrumb "><ol class="d-print-none"><li><span class="home"><a href="../../../index.html"><span>Home</span></a></span></li><li><div class="topicref" data-id="concept_fpz_5r4_vs"><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines_title.html">Cluster Pipelines</a></div></div></li><li class="active"><div class="topicref" data-id="concept_hmh_kfn_1s"><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_hmh_kfn_1s">Cluster Pipelines (deprecated)</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li></ol></div>



                    <div class="wh_right_tools "><button class="wh_hide_highlight" aria-label="Toggle search highlights" title="Toggle search highlights"></button><button class="webhelp_expand_collapse_sections" data-next-state="collapsed" aria-label="Collapse sections" title="Collapse sections"></button><div class=" wh_navigation_links "><span id="topic_navigation_links" class="navheader">
  
<span class="navprev"><a class="- topic/link link" href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines_title.html" title="Cluster Pipelines" aria-label="Previous topic: Cluster Pipelines" rel="prev"></a></span>  
<span class="navnext"><a class="- topic/link link" href="../../../datacollector/UserGuide/Cluster_Mode/KafkaRequirements.html#task_gmd_msw_yr" title="Kafka Cluster Requirements" aria-label="Next topic: Kafka Cluster Requirements" rel="next"></a></span>  </span></div>
<!--External resource link-->
<div class=" wh_print_link print d-none d-md-inline-block "><button onClick="window.print()" title="Print this page" aria-label="Print this page"></button></div>
                        
                        
                        
                        
                    </div>
                </nav>
            </div>

            

<div class="wh_content_area">
                <div class="row">
                    


                        <nav role="navigation" id="wh_publication_toc" class="col-lg-3 col-md-3 col-sm-12 d-md-block d-none d-print-none">
<div id="wh_publication_toc_content">


                            <div class=" wh_publication_toc " data-tooltip-position="right"><span class="expand-button-action-labels"><span id="button-expand-action" role="button" aria-label="Expand"></span><span id="button-collapse-action" role="button" aria-label="Collapse"></span><span id="button-pending-action" role="button" aria-label="Pending"></span></span><ul role="tree" aria-label="Table of Contents"><li role="treeitem" aria-expanded="false"><div data-tocid="concept_htw_ghg_jq-d16893e53" class="topicref" data-id="concept_htw_ghg_jq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_htw_ghg_jq-d16893e53-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Getting_Started/GettingStarted_Title.html#concept_htw_ghg_jq" id="concept_htw_ghg_jq-d16893e53-link">Getting Started</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_l2v_nlp_mpb-d16893e331" class="topicref" data-id="concept_l2v_nlp_mpb" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_l2v_nlp_mpb-d16893e331-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/ReleaseNotes/ReleaseNotes.html#concept_l2v_nlp_mpb" id="concept_l2v_nlp_mpb-d16893e331-link">Release Notes</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_l4q_flb_kr-d16893e2582" class="topicref" data-id="concept_l4q_flb_kr" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_l4q_flb_kr-d16893e2582-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Installation/Install_title.html" id="concept_l4q_flb_kr-d16893e2582-link">Installation</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_ylh_yyz_ky-d16893e3984" class="topicref" data-id="concept_ylh_yyz_ky" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_ylh_yyz_ky-d16893e3984-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Configuration/Config_title.html" id="concept_ylh_yyz_ky-d16893e3984-link">Configuration</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_ejk_f1f_5v-d16893e7058" class="topicref" data-id="concept_ejk_f1f_5v" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_ejk_f1f_5v-d16893e7058-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Upgrade/Upgrade_title.html" id="concept_ejk_f1f_5v-d16893e7058-link">Upgrade</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_qsw_cjy_bt-d16893e10103" class="topicref" data-id="concept_qsw_cjy_bt" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_qsw_cjy_bt-d16893e10103-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Pipeline_Design/PipelineDesign_title.html" id="concept_qsw_cjy_bt-d16893e10103-link">Pipeline Concepts and Design</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_qn1_wn4_kq-d16893e11199" class="topicref" data-id="concept_qn1_wn4_kq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_qn1_wn4_kq-d16893e11199-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Pipeline_Configuration/PipelineConfiguration_title.html" id="concept_qn1_wn4_kq-d16893e11199-link">Pipeline Configuration</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_hdr_gyw_41b-d16893e13057" class="topicref" data-id="concept_hdr_gyw_41b" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_hdr_gyw_41b-d16893e13057-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Data_Formats/DataFormats-Title.html" id="concept_hdr_gyw_41b-d16893e13057-link">Data Formats</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_yjl_nc5_jq-d16893e14164" class="topicref" data-id="concept_yjl_nc5_jq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_yjl_nc5_jq-d16893e14164-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Origins/Origins_title.html" id="concept_yjl_nc5_jq-d16893e14164-link">Origins</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_yjl_nc5_jq-d16893e35197" class="topicref" data-id="concept_yjl_nc5_jq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_yjl_nc5_jq-d16893e35197-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Processors/Processors_title.html" id="concept_yjl_nc5_jq-d16893e35197-link">Processors</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_agj_cfj_br-d16893e44037" class="topicref" data-id="concept_agj_cfj_br" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_agj_cfj_br-d16893e44037-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Destinations/Destinations-title.html" id="concept_agj_cfj_br-d16893e44037-link">Destinations</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_umc_1lk_fx-d16893e56072" class="topicref" data-id="concept_umc_1lk_fx" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_umc_1lk_fx-d16893e56072-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Executors/Executors-title.html" id="concept_umc_1lk_fx-d16893e56072-link">Executors</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_xxd_f5r_kx-d16893e59696" class="topicref" data-id="concept_xxd_f5r_kx" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_xxd_f5r_kx-d16893e59696-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Event_Handling/EventFramework-Title.html#concept_xxd_f5r_kx" id="concept_xxd_f5r_kx-d16893e59696-link">Dataflow Triggers</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_zq5_pb4_flb-d16893e60134" class="topicref" data-id="concept_zq5_pb4_flb" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_zq5_pb4_flb-d16893e60134-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Solutions/Solutions-title.html" id="concept_zq5_pb4_flb-d16893e60134-link">Solutions</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_ugp_kwf_xw-d16893e61337" class="topicref" data-id="concept_ugp_kwf_xw" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_ugp_kwf_xw-d16893e61337-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/DPM/DPM_title.html" id="concept_ugp_kwf_xw-d16893e61337-link">StreamSets Control Hub</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_fyf_gkq_4bb-d16893e62693" class="topicref" data-id="concept_fyf_gkq_4bb" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_fyf_gkq_4bb-d16893e62693-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Edge_Mode/EdgePipelines_title.html" id="concept_fyf_gkq_4bb-d16893e62693-link"><span class="ph">StreamSets Data Collector Edge</span></a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_wwq_gxc_py-d16893e63980" class="topicref" data-id="concept_wwq_gxc_py" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_wwq_gxc_py-d16893e63980-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Multithreaded_Pipelines/MultithreadedPipelines.html#concept_wwq_gxc_py" id="concept_wwq_gxc_py-d16893e63980-link">Multithreaded Pipelines</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_gzw_tdm_p2b-d16893e64187" class="topicref" data-id="concept_gzw_tdm_p2b" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_gzw_tdm_p2b-d16893e64187-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Microservice/Microservice_Title.html#concept_gzw_tdm_p2b" id="concept_gzw_tdm_p2b-d16893e64187-link">Microservice Pipelines</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="Orchestrators_Title-d16893e64348" class="topicref" data-id="Orchestrators_Title" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action Orchestrators_Title-d16893e64348-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Orchestration_Pipelines/OrchestrationPipelines_Title.html#Orchestrators_Title" id="Orchestrators_Title-d16893e64348-link">Orchestration Pipelines</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_wr1_ktz_bt-d16893e64489" class="topicref" data-id="concept_wr1_ktz_bt" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_wr1_ktz_bt-d16893e64489-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/RPC_Pipelines/SDC_RPCpipelines_title.html#concept_wr1_ktz_bt" id="concept_wr1_ktz_bt-d16893e64489-link">SDC RPC Pipelines</a></div></div></li><li role="treeitem" aria-expanded="true"><div data-tocid="concept_fpz_5r4_vs-d16893e64679" class="topicref" data-id="concept_fpz_5r4_vs" data-state="expanded"><span role="button" tabindex="0" aria-labelledby="button-collapse-action concept_fpz_5r4_vs-d16893e64679-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines_title.html" id="concept_fpz_5r4_vs-d16893e64679-link">Cluster Pipelines</a></div></div><ul role="group" class="navbar-nav nav-list"><li role="treeitem" aria-expanded="true" class="active"><div data-tocid="concept_hmh_kfn_1s-d16893e64701" class="topicref" data-id="concept_hmh_kfn_1s" data-state="expanded"><span role="button" tabindex="0" aria-labelledby="button-collapse-action concept_hmh_kfn_1s-d16893e64701-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_hmh_kfn_1s" id="concept_hmh_kfn_1s-d16893e64701-link">Cluster Pipelines (deprecated)</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div><ul role="group" class="navbar-nav nav-list"><li role="treeitem"><div data-tocid="concept_rjc_4m5_lx-d16893e64725" class="topicref" data-id="concept_rjc_4m5_lx" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_rjc_4m5_lx" id="concept_rjc_4m5_lx-d16893e64725-link">Cluster Batch and Streaming Execution Modes</a><div class="wh-tooltip"><p class="shortdesc"><span class="ph">Data Collector</span>         can run a cluster pipeline using cluster batch or cluster streaming execution         mode.</p></div></div></div></li><li role="treeitem"><div data-tocid="concept_ywt_vp3_vdb-d16893e64752" class="topicref" data-id="concept_ywt_vp3_vdb" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_ywt_vp3_vdb" id="concept_ywt_vp3_vdb-d16893e64752-link">Data Collector Configuration</a></div></div></li><li role="treeitem"><div data-tocid="concept_rmd_hgp_cw-d16893e64774" class="topicref" data-id="concept_rmd_hgp_cw" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_rmd_hgp_cw" id="concept_rmd_hgp_cw-d16893e64774-link">Enable HTTPS</a><div class="wh-tooltip"><p class="shortdesc">You can enable <span class="ph">Data Collector</span> to use         HTTPS when you run cluster pipelines. By default <span class="ph">Data Collector</span> uses         HTTP.</p></div></div></div></li><li role="treeitem"><div data-tocid="concept_cwy_xgl_cgb-d16893e64805" class="topicref" data-id="concept_cwy_xgl_cgb" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_cwy_xgl_cgb" id="concept_cwy_xgl_cgb-d16893e64805-link">Temporary Directory</a><div class="wh-tooltip"><p class="shortdesc"><span class="ph">Data Collector</span>         requires that the Java temporary directory on the gateway node in the cluster is         writable.</p></div></div></div></li><li role="treeitem"><div data-tocid="concept_iyx_23c_j2b-d16893e64832" class="topicref" data-id="concept_iyx_23c_j2b" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_iyx_23c_j2b" id="concept_iyx_23c_j2b-d16893e64832-link">Logs</a><div class="wh-tooltip"><p class="shortdesc">Because cluster pipelines run as either MapReduce or Spark applications, each <span class="ph">Data Collector</span>Â worker         in the cluster manages its own log.Â </p></div></div></div></li><li role="treeitem"><div data-tocid="concept_cs4_lcg_j5-d16893e64860" class="topicref" data-id="concept_cs4_lcg_j5" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_cs4_lcg_j5" id="concept_cs4_lcg_j5-d16893e64860-link">Checkpoint Storage for Streaming Pipelines</a><div class="wh-tooltip"><p class="shortdesc">When <span class="ph">Data Collector</span> runs a         cluster streaming pipeline, <span class="ph">Data Collector</span>         generates and stores checkpoint metadata. The checkpoint metadata provides the offset for         the origin.</p></div></div></div></li><li role="treeitem"><div data-tocid="concept_xxz_nft_ls-d16893e64891" class="topicref" data-id="concept_xxz_nft_ls" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_xxz_nft_ls" id="concept_xxz_nft_ls-d16893e64891-link">Error Handling Limitation</a></div></div></li><li role="treeitem"><div data-tocid="concept_fk4_gd4_1s-d16893e64913" class="topicref" data-id="concept_fk4_gd4_1s" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_fk4_gd4_1s" id="concept_fk4_gd4_1s-d16893e64913-link">Monitoring and Snapshot</a></div></div></li></ul></li><li role="treeitem" aria-expanded="false"><div data-tocid="task_gmd_msw_yr-d16893e64935" class="topicref" data-id="task_gmd_msw_yr" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action task_gmd_msw_yr-d16893e64935-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/KafkaRequirements.html#task_gmd_msw_yr" id="task_gmd_msw_yr-d16893e64935-link">Kafka Cluster Requirements</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_kry_gn5_lx-d16893e65003" class="topicref" data-id="concept_kry_gn5_lx" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_kry_gn5_lx-d16893e65003-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/MapRRequirements.html#concept_kry_gn5_lx" id="concept_kry_gn5_lx-d16893e65003-link">MapR Requirements</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem"><div data-tocid="task_akz_w5b_ws-d16893e65049" class="topicref" data-id="task_akz_w5b_ws" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/HDFSRequirements.html#task_akz_w5b_ws" id="task_akz_w5b_ws-d16893e65049-link">HDFS Requirements</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_opj_jmf_f2b-d16893e65073" class="topicref" data-id="concept_opj_jmf_f2b" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_opj_jmf_f2b-d16893e65073-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/AmazonS3Requirements.html#concept_opj_jmf_f2b" id="concept_opj_jmf_f2b-d16893e65073-link">Amazon S3 Requirements</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem"><div data-tocid="concept_pdf_r5y_fz-d16893e65147" class="topicref" data-id="concept_pdf_r5y_fz" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/StageLimitations.html#concept_pdf_r5y_fz" id="concept_pdf_r5y_fz-d16893e65147-link">Cluster Pipeline Limitations</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li></ul></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_jjk_23z_sq-d16893e65172" class="topicref" data-id="concept_jjk_23z_sq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_jjk_23z_sq-d16893e65172-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Data_Preview/DataPreview_Title.html#concept_jjk_23z_sq" id="concept_jjk_23z_sq-d16893e65172-link">Data Preview</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_pgk_brx_rr-d16893e65458" class="topicref" data-id="concept_pgk_brx_rr" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_pgk_brx_rr-d16893e65458-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Alerts/RulesAlerts_title.html#concept_pgk_brx_rr" id="concept_pgk_brx_rr-d16893e65458-link">Rules and Alerts</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_asx_fdz_sq-d16893e65960" class="topicref" data-id="concept_asx_fdz_sq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_asx_fdz_sq-d16893e65960-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Pipeline_Monitoring/PipelineMonitoring_title.html#concept_asx_fdz_sq" id="concept_asx_fdz_sq-d16893e65960-link">Pipeline Monitoring</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_o3l_dtr_5q-d16893e66304" class="topicref" data-id="concept_o3l_dtr_5q" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_o3l_dtr_5q-d16893e66304-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Pipeline_Maintenance/PipelineMaintenance_title.html#concept_o3l_dtr_5q" id="concept_o3l_dtr_5q-d16893e66304-link">Pipeline Maintenance</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_yms_ftm_sq-d16893e66768" class="topicref" data-id="concept_yms_ftm_sq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_yms_ftm_sq-d16893e66768-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Administration/Administration_title.html#concept_yms_ftm_sq" id="concept_yms_ftm_sq-d16893e66768-link">Administration</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_nls_w1r_ks-d16893e67508" class="topicref" data-id="concept_nls_w1r_ks" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_nls_w1r_ks-d16893e67508-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Tutorial/Tutorial-title.html" id="concept_nls_w1r_ks-d16893e67508-link">Tutorial</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_sh3_frm_tq-d16893e68001" class="topicref" data-id="concept_sh3_frm_tq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_sh3_frm_tq-d16893e68001-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Troubleshooting/Troubleshooting_title.html#concept_sh3_frm_tq" id="concept_sh3_frm_tq-d16893e68001-link">Troubleshooting</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_xbx_rs1_tq-d16893e68798" class="topicref" data-id="concept_xbx_rs1_tq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_xbx_rs1_tq-d16893e68798-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Glossary/Glossary_title.html#concept_xbx_rs1_tq" id="concept_xbx_rs1_tq-d16893e68798-link">Glossary</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_jn1_nzb_kv-d16893e68843" class="topicref" data-id="concept_jn1_nzb_kv" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_jn1_nzb_kv-d16893e68843-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Apx-DataFormats/DataFormat_Title.html#concept_jn1_nzb_kv" id="concept_jn1_nzb_kv-d16893e68843-link">Data Formats by Stage</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_pvm_yt3_wq-d16893e68958" class="topicref" data-id="concept_pvm_yt3_wq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_pvm_yt3_wq-d16893e68958-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Expression_Language/ExpressionLanguage_title.html" id="concept_pvm_yt3_wq-d16893e68958-link">Expression Language</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_vcj_1ws_js-d16893e69669" class="topicref" data-id="concept_vcj_1ws_js" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_vcj_1ws_js-d16893e69669-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Apx-RegEx/RegEx-Title.html#concept_vcj_1ws_js" id="concept_vcj_1ws_js-d16893e69669-link">Regular Expressions</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_chv_vmj_wr-d16893e69787" class="topicref" data-id="concept_chv_vmj_wr" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_chv_vmj_wr-d16893e69787-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Apx-GrokPatterns/GrokPatterns_title.html#concept_chv_vmj_wr" id="concept_chv_vmj_wr-d16893e69787-link">Grok Patterns</a></div></div></li></ul></div>
                        

</div>
</nav>
                    


                    
                    <div id="wh_topic_body" class="col-lg-7 col-md-9 col-sm-12">
<button id="wh_close_publication_toc_button" class="close-toc-button d-none" aria-label="Toggle publishing table of content" aria-controls="wh_publication_toc" aria-expanded="true"><span class="close-toc-icon-container"><span class="close-toc-icon"></span></span></button><button id="wh_close_topic_toc_button" class="close-toc-button d-none" aria-label="Toggle topic table of content" aria-controls="wh_topic_toc" aria-expanded="true"><span class="close-toc-icon-container"><span class="close-toc-icon"></span></span></button>

                        
<div class=" wh_topic_content body "><main role="main"><article class="" role="article" aria-labelledby="ariaid-title1"><article class="nested0" aria-labelledby="ariaid-title1" id="concept_hmh_kfn_1s">
  <h1 class="- topic/title title topictitle1" id="ariaid-title1">Cluster Pipelines (deprecated)</h1>
  
  <div class="- topic/body concept/conbody body conbody"><p class="- topic/shortdesc shortdesc"></p>
    <div class="- topic/p p">A <dfn class="- topic/term term">cluster pipeline</dfn> is a pipeline that runs in cluster execution mode. <div class="- topic/note note important note_important"><span class="note__title">Important:</span> <span class="- topic/ph ph" id="concept_hmh_kfn_1s__deprFunctionality">This functionality is <a class="- topic/xref xref" href="../Pipeline_Design/Deprecated.html#concept_pby_dh2_lpb">deprecated</a> and may be removed in a future release.</span>
                  <span class="- topic/ph ph" id="concept_hmh_kfn_1s__dep-rep-cluster"><span class="- topic/ph ph" id="concept_hmh_kfn_1s__UseTx"><span class="- topic/ph ph">StreamSets</span>
                              recommends using <span class="- topic/ph ph">StreamSets</span>
                              Transformer instead. For more information, see the <a class="- topic/xref xref" href="https://streamsets.com/documentation/transformer/latest/help/index.html?contextID=concept_a1b_zf4_pgb" target="_blank" rel="external noopener">Transformer
                                    documentation</a>.</span></span></div></div>
    <p class="- topic/p p">You can run a pipeline in standalone execution mode or cluster execution mode. In standalone
      mode, a single <span class="- topic/ph ph">Data Collector</span> process
      runs the pipeline. A pipeline runs in standalone mode by default. </p>
    <p class="- topic/p p">In cluster mode, the <span class="- topic/ph ph">Data Collector</span> uses a
      cluster manager and a cluster application to spawn additional workers as needed. Use cluster
      mode to read data from a Kafka cluster, MapR cluster, HDFS, or Amazon S3.</p>
    <p class="- topic/p p">When would you choose standalone or cluster mode? Say you want to ingest logs from
      application servers and perform a computationally expensive transformation. To do this, you
      might use a set of standalone pipelines to stream log data from each application server to a
      Kafka or MapR cluster. And then use a cluster pipeline to process the data from the cluster
      and perform the expensive transformation.</p>
    <div class="- topic/p p">Or, you might use cluster mode to move data from HDFS to another destination, such as
          Elasticsearch.<div class="- topic/note note note note_note"><span class="note__title">Note:</span> <span class="- topic/ph ph">MapR is now <span class="- topic/ph ph">HPE Ezmeral Data Fabric</span>. At
                        times, this documentation uses "MapR" to refer to both MapR and <span class="- topic/ph ph">HPE Ezmeral Data Fabric</span>.</span></div></div>
  </div>
<article class="- topic/topic concept/concept topic concept nested1" aria-labelledby="ariaid-title2" id="concept_rjc_4m5_lx">
    <h2 class="- topic/title title topictitle2" id="ariaid-title2">Cluster Batch and Streaming Execution Modes</h2>
    
    <div class="- topic/body concept/conbody body conbody"><p class="- topic/shortdesc shortdesc"><span class="- topic/ph ph">Data Collector</span>
        can run a cluster pipeline using cluster batch or cluster streaming execution
        mode.</p>
        <p class="- topic/p p">The execution mode that <span class="- topic/ph ph">Data Collector</span> can
            use depends on the origin system that the cluster pipeline reads from:</p>
        <dl class="- topic/dl dl">
            
                <dt class="- topic/dt dt dlterm">Kafka cluster</dt>
                <dd class="- topic/dd dd"><span class="- topic/ph ph">Data Collector</span> can process data from a Kafka cluster in cluster streaming mode. In cluster
                    streaming mode, <span class="- topic/ph ph">Data Collector</span> processes data continuously until you stop the pipeline. </dd>
                <dd class="- topic/dd dd ddexpand"><span class="- topic/ph ph">Data Collector</span> runs as an application within Spark Streaming, an open source
                    cluster-computing application. </dd>
                <dd class="- topic/dd dd ddexpand">Spark Streaming runs on the YARN cluster manager to process data from a Kafka
                    cluster. The cluster manager and Spark Streaming spawn a <span class="- topic/ph ph">Data Collector</span> worker for each topic partition in the Kafka cluster. As a result, each
                    partition has a <span class="- topic/ph ph">Data Collector</span> worker to process data. If you add a partition to the Kafka topic, you must
                    restart the pipeline to enable the <span class="- topic/ph ph">Data Collector</span> to generate a new worker to read from the new partition. </dd>
                <dd class="- topic/dd dd ddexpand">When Spark Streaming runs on YARN, you can limit the number of workers spawned
                    by configuring the <a class="- topic/xref xref" href="KafkaRequirements.html#task_hhk_bfv_cy__Kafka-YARNWorker">Worker
                        Count cluster pipeline property</a>. You can also use the Extra Spark
                    Configuration property to pass Spark configurations to the spark-submit script.
                    In addition, you can configure the Kafka Consumer origin in a cluster streaming
                    pipeline on YARN to <a class="- topic/xref xref" href="../Pipeline_Configuration/KafkaSecurity.html#concept_jpy_ln5_klb">connect securely</a> through SSL/TLS, Kerberos, or both.<p class="- topic/p p">Use the Kafka
                        Consumer origin to process data from a Kafka cluster in cluster streaming
                        mode.</p></dd>
            
        </dl>
        <dl class="- topic/dl dl">
            
                <dt class="- topic/dt dt dlterm">MapR cluster</dt>
                <dd class="- topic/dd dd"><span class="- topic/ph ph">Data Collector</span> can process data from a MapR cluster in cluster batch mode.</dd>
                <dd class="- topic/dd dd ddexpand">In cluster batch mode, <span class="- topic/ph ph">Data Collector</span> processes all available data and then stops the pipeline. <span class="- topic/ph ph">Data Collector</span> runs as an application on top of MapReduce, an open-source cluster-computing
                    framework. MapReduce runs on a YARN cluster manager. YARN and MapReduce generate
                    additional worker nodes as needed. MapReduce creates one map task for each MapR
                    FS block.<p class="- topic/p p">Use the MapR FS origin to process data from MapR in cluster batch
                        mode.</p></dd>
            
            
                <dt class="- topic/dt dt dlterm">HDFS</dt>
                <dd class="- topic/dd dd"><span class="- topic/ph ph">Data Collector</span> can process data from HDFS in cluster batch mode. In cluster batch mode, <span class="- topic/ph ph">Data Collector</span> processes all available data and then stops the pipeline. </dd>
                <dd class="- topic/dd dd ddexpand"><span class="- topic/ph ph">Data Collector</span> runs as an application on top of MapReduce, an open-source cluster-computing
                    framework. MapReduce runs on a YARN cluster manager. YARN and MapReduce generate
                    additional worker nodes as needed. MapReduce creates one map task for each HDFS
                    block. <p class="- topic/p p">Use the Hadoop FS origin to process data from HDFS in cluster batch
                        mode.</p></dd>
            
            
                <dt class="- topic/dt dt dlterm">Amazon S3</dt>
                <dd class="- topic/dd dd">
                    <div class="- topic/p p"><span class="- topic/ph ph">Data Collector</span> can process data from Amazon S3 in the following cluster batch modes:<ul class="- topic/ul ul" id="concept_rjc_4m5_lx__ul_zlk_1z5_g2b" data-ofbid="concept_rjc_4m5_lx__ul_zlk_1z5_g2b">
                            <li class="- topic/li li">Cluster EMR batch mode - In cluster EMR batch mode, <span class="- topic/ph ph">Data Collector</span> runs on an Amazon EMR cluster to process Amazon S3 data. <span class="- topic/ph ph">Data Collector</span> can run on an existing EMR cluster or on a new EMR cluster that
                                is provisioned when the pipeline starts. When you provision a new
                                EMR cluster, you can configure whether the cluster remains active or
                                terminates when the pipeline stops.</li>
                            <li class="- topic/li li">Cluster batch mode - In cluster batch mode, <span class="- topic/ph ph">Data Collector</span> runs on a Cloudera distribution of Hadoop (CDH) or Hortonworks
                                Data Platform (HDP) cluster to process Amazon S3 data. </li>
                        </ul></div>
                </dd>
                <dd class="- topic/dd dd ddexpand">
                    <p class="- topic/p p">In either mode, <span class="- topic/ph ph">Data Collector</span> processes all available data and then stops the pipeline. </p>
                </dd>
                <dd class="- topic/dd dd ddexpand">
                    <p class="- topic/p p"><span class="- topic/ph ph">Data Collector</span> runs as an application on top of MapReduce in the EMR, CDH, or HDP
                        cluster. MapReduce runs on a YARN cluster manager. MapReduce creates one map
                        task for each HDFS block.</p>
                </dd>
                <dd class="- topic/dd dd ddexpand">
                    <p class="- topic/p p">Use the Hadoop FS origin to process data from Amazon S3 in cluster EMR or
                        cluster batch mode.</p>
                </dd>
            
        </dl>
    </div>
</article><article class="- topic/topic concept/concept topic concept nested1" aria-labelledby="ariaid-title3" id="concept_ywt_vp3_vdb">
    <h2 class="- topic/title title topictitle2" id="ariaid-title3">Data Collector Configuration</h2>
    <div class="- topic/body concept/conbody body conbody">
        <div class="- topic/p p">When running cluster pipelines, the <span class="- topic/ph ph">Data Collector</span>
            configuration file, <code class="+ topic/ph pr-d/codeph ph codeph">$SDC_CONF/sdc.properties</code>, defined on the gateway
            node is propagated to the worker nodes with the exception of the following
                properties:<ul class="- topic/ul ul" id="concept_ywt_vp3_vdb__ul_ydf_k4j_vdb" data-ofbid="concept_ywt_vp3_vdb__ul_ydf_k4j_vdb">
                <li class="- topic/li li"><code class="+ topic/ph pr-d/codeph ph codeph">sdc.base.http.url</code></li>
                <li class="- topic/li li"><code class="+ topic/ph pr-d/codeph ph codeph">http.bindHost</code></li>
            </ul></div>
        <p class="- topic/p p">If you modify the <code class="+ topic/ph pr-d/codeph ph codeph">sdc.base.http.url</code> and <code class="+ topic/ph pr-d/codeph ph codeph">http.bindHost</code>
            properties on the gateway node to configure a specific host name or port number or to
            configure a specific IP address that <span class="- topic/ph ph">Data Collector</span>
            binds to, the modified values are not propagated to the worker nodes. The worker nodes
            always use the default values for the <code class="+ topic/ph pr-d/codeph ph codeph">sdc.base.http.url</code> and
                <code class="+ topic/ph pr-d/codeph ph codeph">http.bindHost</code> properties so that the worker nodes can dynamically
            determine the host name and can bind to any IP address.</p>
        <div class="- topic/p p">To prevent additional configuration properties from being propagated to the worker nodes,
            add the following property to the <code class="+ topic/ph pr-d/codeph ph codeph">sdc.properties</code> file on the gateway
            node:<pre class="+ topic/pre pr-d/codeblock pre codeblock"><code>cluster.slave.configs.remove=&lt;property1&gt;,&lt;property2&gt;</code></pre></div>
        <p class="- topic/p p">For more information on configuring the <span class="- topic/ph ph">Data Collector</span>
            configuration file, see <span class="- topic/ph ph"><a class="- topic/xref xref" href="../Configuration/DCConfig.html#concept_pq5_xjq_kr">Data Collector Configuration</a></span>.</p>
    </div>
</article><article class="- topic/topic concept/concept topic concept nested1" aria-labelledby="ariaid-title4" id="concept_rmd_hgp_cw">
 <h2 class="- topic/title title topictitle2" id="ariaid-title4">Enable HTTPS</h2>
 
 <div class="- topic/body concept/conbody body conbody"><p class="- topic/shortdesc shortdesc">You can enable <span class="- topic/ph ph">Data Collector</span> to use
        HTTPS when you run cluster pipelines. By default <span class="- topic/ph ph">Data Collector</span> uses
        HTTP.</p>
  <p class="- topic/p p">To configure HTTPS for cluster pipelines, you first must configure <span class="- topic/ph ph">Data Collector</span> to
            use HTTPS. Then you generate an SSL/TLS certificate for each worker node in the cluster.
                <span class="- topic/ph ph">Data Collector</span>
            runs on the master gateway node in the cluster, so the gateway node uses the same
            keystore file configured for <span class="- topic/ph ph">Data Collector</span>.</p>
        <p class="- topic/p p">You then specify the generated keystore file and keystore password file for the worker
            nodes in the <span class="- topic/ph ph">Data Collector</span>
            configuration file, <code class="+ topic/ph pr-d/codeph ph codeph">$SDC_CONF/sdc.properties</code>. You can optionally
            generate a truststore file for the gateway and worker nodes.</p>
        <p class="- topic/p p"><span class="- topic/ph ph">For more information, see <span class="- topic/ph ph"><a class="- topic/xref xref" href="../Configuration/HTTP_protocols.html#concept_xyp_lt4_cw">Enabling HTTPS</a></span>.</span></p>
 </div>
</article><article class="- topic/topic concept/concept topic concept nested1" aria-labelledby="ariaid-title5" id="concept_cwy_xgl_cgb">
    <h2 class="- topic/title title topictitle2" id="ariaid-title5">Temporary Directory</h2>
    
    <div class="- topic/body concept/conbody body conbody"><p class="- topic/shortdesc shortdesc"><span class="- topic/ph ph">Data Collector</span>
        requires that the Java temporary directory on the gateway node in the cluster is
        writable.</p>
        <p class="- topic/p p">The Java temporary directory is specified by
            the Java system property <code class="+ topic/ph pr-d/codeph ph codeph">java.io.tmpdir</code>. On UNIX, the default value of
            this property is typically <span class="+ topic/ph sw-d/filepath ph filepath">/tmp</span> and is writable. </p>
        <p class="- topic/p p">Before running cluster pipelines, verify that the Java temporary directory on the gateway
            node is writable.</p>
    </div>
</article><article class="- topic/topic concept/concept topic concept nested1" aria-labelledby="ariaid-title6" id="concept_iyx_23c_j2b">
    <h2 class="- topic/title title topictitle2" id="ariaid-title6">Logs</h2>
    
    <div class="- topic/body concept/conbody body conbody"><p class="- topic/shortdesc shortdesc">Because cluster pipelines run as either MapReduce or Spark applications, each <span class="- topic/ph ph">Data Collector</span>Â worker
        in the cluster manages its own log.Â </p>
        <p class="- topic/p p">The <span class="- topic/ph ph">Data Collector</span>
            workers send log messages to different locations based on the cluster execution
            mode:</p>
        <dl class="- topic/dl dl">
            
                <dt class="- topic/dt dt dlterm">Cluster batch mode pipelines</dt>
                <dd class="- topic/dd dd">For cluster batch mode pipelines, each <span class="- topic/ph ph">Data Collector</span> worker sends log messages to the syslog file on the worker node. You can use
                    the YARN Resource Manager UI to view the syslog file for each MapReduce
                    task.</dd>
            
            
                <dt class="- topic/dt dt dlterm">Cluster streaming mode pipelines</dt>
                <dd class="- topic/dd dd">For cluster streaming mode pipelines, each <span class="- topic/ph ph">Data Collector</span> worker sends log messages to stderr on the worker node. You can use the Spark
                    UI to view stderr for each Spark application.</dd>
            
        </dl>
        <p class="- topic/p p">Cluster pipeline logs can grow in size over time, particularly for cluster streaming
            pipelines that run continuously. You can optionally configure the <span class="- topic/ph ph">Data Collector</span>
            installed on the gateway node to use the log4j rolling file appender to write log
            messages to an sdc.log file. This configuration is propagated to the worker nodes such
            that each <span class="- topic/ph ph">Data Collector</span>
            worker writes log messages to an sdc.log file within the YARN application directory. </p>
        <p class="- topic/p p">The log4j rolling file appender automatically rolls or archives the current log file and
            then resumes logging in another file. The
                <code class="+ topic/ph pr-d/codeph ph codeph">$SDC_CONF/sdc-log4j.properties</code> file configured for the <span class="- topic/ph ph">Data Collector</span>
            installed on the gateway node determines how frequently the rolling file appender rolls
            files. By default, it writes log messages to a maximum of 10 files, rolling over to the
            next file when the current file reaches a size of 256 MB.</p>
        <p class="- topic/p p">When you configure <span class="- topic/ph ph">Data Collector</span> to
            use the rolling file appender, you can view the log files for each worker node by using
            the YARN Resource Manager UI to locate the sdc.log file within the YARN application
            directory. </p>
        <div class="- topic/p p">To enable <span class="- topic/ph ph">Data Collector</span> to
            use the rolling file appender, add the following line to the <span class="- topic/ph ph">Data Collector</span>
            configuration file, <code class="+ topic/ph pr-d/codeph ph codeph">$SDC_CONF/sdc.properties</code>, defined on the gateway
            node:<pre class="+ topic/pre pr-d/codeblock pre codeblock"><code>cluster.pipelines.logging.to.stderr=false</code></pre></div>
    </div>
</article><article class="- topic/topic concept/concept topic concept nested1" aria-labelledby="ariaid-title7" id="concept_cs4_lcg_j5">
    <h2 class="- topic/title title topictitle2" id="ariaid-title7">Checkpoint Storage for Streaming Pipelines</h2>
    
    <div class="- topic/body concept/conbody body conbody"><p class="- topic/shortdesc shortdesc">When <span class="- topic/ph ph">Data Collector</span> runs a
        cluster streaming pipeline, <span class="- topic/ph ph">Data Collector</span>
        generates and stores checkpoint metadata. The checkpoint metadata provides the offset for
        the origin.</p>
        <div class="- topic/p p"><span class="- topic/ph ph">Data Collector</span>
            stores the checkpoint metadata in the following path on
            HDFS:<pre class="+ topic/pre pr-d/codeblock pre codeblock"><code>/user/$USER/.streamsets-spark-streaming/&lt;DataCollector ID&gt;/&lt;Kafka topic&gt;/&lt;consumer group&gt;/&lt;pipelineName&gt;</code></pre></div>
    </div>
</article><article class="- topic/topic concept/concept topic concept nested1" aria-labelledby="ariaid-title8" id="concept_xxz_nft_ls">
  <h2 class="- topic/title title topictitle2" id="ariaid-title8">Error Handling Limitation</h2>
  <div class="- topic/body concept/conbody body conbody">
    <div class="- topic/p p">Please
      note the following limitations to pipeline configuration options at this time:<ul class="- topic/ul ul" id="concept_xxz_nft_ls__ul_vf1_zft_ls" data-ofbid="concept_xxz_nft_ls__ul_vf1_zft_ls">
        <li class="- topic/li li"><span class="+ topic/ph ui-d/uicontrol ph uicontrol">Error Records</span> - Write error records to Kafka or discard the
          records. Stopping the pipeline or writing records to file is not supported at this time.
        </li>
      </ul></div>
  </div>
</article><article class="- topic/topic concept/concept topic concept nested1" aria-labelledby="ariaid-title9" id="concept_fk4_gd4_1s">
 <h2 class="- topic/title title topictitle2" id="ariaid-title9">Monitoring and Snapshot</h2>
 <div class="- topic/body concept/conbody body conbody">
    <p class="- topic/p p">The <span class="- topic/ph ph">Data Collector</span> UI allows
      you to monitor each <span class="- topic/ph ph">Data Collector</span> worker. </p>
    <p class="- topic/p p">After you start a pipeline, the <span class="- topic/ph ph">Data Collector</span> UI
      displays basic monitoring information for the pipeline and links to each <span class="- topic/ph ph">Data Collector</span> worker.
      For monitoring details for a <span class="- topic/ph ph">Data Collector</span> worker,
      click the worker link. You can then view metrics and alerts for the worker. </p>
    <div class="- topic/p p">Metric and data alerts are defined for the pipeline, but triggered by individual workers.
      When you define a metric or data alert, each worker inherits the alert and triggers the alert
      based on the statistics for the worker.<div class="- topic/note note note note_note"><span class="note__title">Note:</span> You cannot take snapshots when monitoring cluster
        pipelines.</div></div>
  </div>
</article></article></article></main></div>

                        
                        
                        


                    </div>
                    
                </div>
            </div>


        </div> <nav class="navbar navbar-default wh_footer" data-whc_version="25.0">
  <div class=" footer-container  mx-auto">
    <!-- script for Data Collector, all flavors, but only used when accessed directly, not from portal --><script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-60917135-3', 'auto');
  ga('send', 'pageview');
</script>
  </div>
</nav>

        
        <div id="go2top">
            <span class="oxy-icon oxy-icon-up"></span>
        </div>
        
        <!-- The modal container for images -->
        <div id="modal_img_large" class="modal">
            <span class="close oxy-icon oxy-icon-remove"></span>
            <!-- Modal Content (The Image) -->
            <div id="modal_img_container"></div>
            <!-- Modal Caption (Image Text) -->
            <div id="caption"></div>
        </div>
        
        
        Â© 2023 StreamSets, Inc.

    </body>
</html>